# Estimate quantity regressions
# TODO: Uncertain if we are including these in the paper - so
pacman::p_load(tidyverse, fixest, qs, fst)

# Setup -----
rm(list = ls())

source("code/globals.R")

# Load cleaned home data
parcels <- read_fst(file.path(WORKING, "statewide-sample.fst"))

# Aggregate to get # of homes built in each year by block and responsibility area.
summarize_by_distance <- function(data) {
  # data = parcels %>% filter(border_1000)
  # END DEBUG

  blocks_in_buffer <- data %>%
    group_by(block, tract, blockgroup, county = substr(block, 1, 5), treatmentgroup) %>%
    summarize(
      homes_total = n(),
      homes_pre1998 = sum(combinedyear < 1998, na.rm = T),
      homes_post1998 = sum(combinedyear >= 1998, na.rm = T),
      avg_slope = mean(slope, na.rm = T),
      avg_elev = mean(elev, na.rm = T),
      avg_lot_size = mean(LotSizeAcres, na.rm = T),
      avg_sqft = mean(sqfeet / 1000, na.rm = T),
      avg_bedrooms = mean(TotalBedrooms, na.rm = T),
      avg_hazard = mean(wildfire_hazard, na.rm = T),
      avg_combinedyear = mean(combinedyear, na.rm = T)
    ) %>%
    ungroup()
  
  # Eliminate blocks in more than one treatment group
  blocks_in_buffer <- blocks_in_buffer %>% group_by(block) %>% filter(n_distinct(treatmentgroup) == 1) %>% ungroup()

  blocks_in_buffer <- blocks_in_buffer %>%
    mutate(
      log_homes_total = log(homes_total),
      log_old_homes = log(homes_pre1998),
      log_new_homes = log(homes_post1998),
      growth_rate = (homes_post1998 / homes_pre1998),
      change_in_homes = homes_post1998 - homes_pre1998,
      log_growth_rate = log(growth_rate)
    )

  # Compute 95% of growth rate (if not infinite)
  growth_rate_95pct <- quantile(blocks_in_buffer$growth_rate[is.finite(blocks_in_buffer$growth_rate)], probs = 0.95, na.rm = T)
  blocks_in_buffer <- blocks_in_buffer %>%
    mutate(growth_rate_cap = ifelse(growth_rate > growth_rate_95pct, growth_rate_95pct, growth_rate))

  blocks_in_buffer
}

blocks_homes_250 <- summarize_by_distance(parcels %>% filter(border_250))
blocks_homes_500 <- summarize_by_distance(parcels %>% filter(border_500))
blocks_homes_1000 <- summarize_by_distance(parcels %>% filter(border_1000))
# blocks_homes_2000 <- summarize_by_distance(parcels %>% filter(border_2000))

# Estimate quantity regressions
fits <- list()

# fits$buff2000_tract <- feols(growth_rate_cap ~ i(treatmentgroup, ref = "No-codes") + avg_hazard + avg_lot_size + avg_sqft + avg_bedrooms  | tract, data = blocks_homes_2000, weights = ~homes_pre1998)

# Main regressions use 1000m buffer

# 1000m buffer
fits$buff1000_tract <- feols(growth_rate_cap ~ i(treatmentgroup, ref = "No-codes") + avg_slope | tract, data = blocks_homes_1000, weights = ~homes_pre1998)

fmla_quantity_controls <- as.formula(paste("growth_rate_cap ~ i(treatmentgroup, ref = 'No-codes') + avg_slope + avg_elev + avg_hazard + avg_lot_size + avg_sqft + avg_bedrooms | tract"))

# Add other controls
fits$buff1000_tract_controls <- feols(fmla_quantity_controls, data = blocks_homes_1000, weights = ~homes_pre1998)


# Only places that had at least 5 pre-1998 homes
fits$buff1000_tract_min10 <- feols(fmla_quantity_controls, data = blocks_homes_1000 %>% filter(homes_pre1998 >= 5), weights = ~homes_pre1998)


# 500, 250 

fits$buff500_tract <- feols(fmla_quantity_controls, data = blocks_homes_500, weights = ~homes_pre1998)

fits$buff250_tract <- feols(fmla_quantity_controls, data = blocks_homes_250, weights = ~homes_pre1998)





etable(fits,
  cluster = ~tract,
  depvar = T,
  tex = T,
  digits = "s3",
  digits.stats = "s2",
  group = list(
    "Additional Controls" = c("Square feet", "Elevation", "Bedrooms", "Lot size", "Wildfire hazard")
  ),
  extralines = list(
    "-_Border size" = c("1 km", "1 km", "1 km", "500 m", "250 m"),
    "-_Sub-sample" = c("All", "All", "Min. 10\npre-code", "All", "All")
  ),
  order = c("SRA", "LRA")
) %>%
  write("results/quantity-regs.tex")
